##########################################
### Introduction to R
### Samantha Zuhlke
###
### This R script provides an introduction to 
### basic coding in R. In particular, it 
### reviews the basic coding structure of R 
### and basic summary statistics. 
##########################################
##########################################
##########################################
##########################################
##########################################

##########################################
# begin Preamble: Every R script has a preamble. 
# The preamble contains the necessary packages and setting your workspace.
# You must include a preamble at the beginning of every R script.

#PACKAGES
# Install necessary programs (called packages) to specific R tasks 
# If you are using your laptop, you will need to install each package using the command:
# install.packages("PACKAGE NAME GOES HERE"). Don't worry too much if you can't get them to 
# install... they are not necessary for the tasks in this tutorial.

# to comment out, use the "#" sign. Note, I've commented these out because they are 
# already installed on my computer. If you have never installed them before, run the code.

# install.packages("foreign")
# install.packages("readstata13")
# install.packages("ggplot2")
# install.packages("lattice")
# install.packages("car")
# install.packages("dplyr")

require(foreign)
require(readstata13)
require(ggplot2) # two, not z
require(lattice)
require(car)
require(dplyr)

# SETTING YOUR WORKSPACE
# Tell R WHERE to find the data and save all the files for your project: 
# selecting, analyzing, and storing the data. 
# This area is called your WORKSPACE.
# I suggest creating a folder called "R Workspace" on your desktop or on the TAMU S drive.

# Check where your current workspace is set 
getwd()
# This is a good example of running code and seeing an output.

# Set your workspace 
# The best way to get the file director is to copy and paste it from the computer.

setwd("/Users/samanthazuhlke/Desktop/R Workspace/POLS_309") 
# Note, if you are using a PC the slashes (/) may need to go the opposite way. 
# I wrote this code on a mac.

# The file path will be unique given which computer you are on, so you may need to change
# it each time you work with R (unless you are on a personal computer). 
# Regardless, it's good practice to set your workspace each time you use R.

# Note, you may want to use the "here" package with students instead, 
## which makes setting workspaces much easier. 

# In class exercise: Use code to check that you set your workspace correctly. 

# End Preamble
#####################################################################
# Note: This is the end of your "Preamble." Every time you use R, you must
# include the information from the preamble: 
# name and date, required packages, setting the working directory
#####################################################################

# USING DATA IN R
# R can be used as a calculator 
2 + 2

# ASSIGNING VALUES AND SMALL DATA SETS IN R 
# You have to tell R what objects are. This makes both R flexible and annoying.
# "<-" is the assignment operator with R. Our example below is saying "Hey R, 
# make a variable called x and assign it a value of 2."
# More formally, a single value like this is called an "object."
# Objects can be constants, vectors, matrices, code output, etc.

x <- 2
x <- 4 # R will overwrite with no warning
x <- 2

X <- 4 # R is case sensitive

# To recall our value we type in the name of our new variable.
x 
X

# In class exercise: What happens when you add x + x ?

# Clearing assigned values
rm(x)
# or, can clear your whole workspace with the following line of code: 
rm(list=ls())

# Create a variable with more than one value. 
# We do this when we want to create a variable with more than one value. 
# More formally, this is called a vector (a row of numbers.)
x<-c(1,2,3,4,5,6,7,8,9,10)

# In class exercise: What happens when you add x + x? 
x + x 
sum <- x + x  # save output as a new object

# SUMMARY STATISTICS
# If we want to see our data
View(x)
# Note, R is case sensitive. For example, try to run view(X).

# If we want to summarize our data
# Summary is a useful command that I use on every project.
summary(x)

# In class exercise: Interpret the output from the Summary command.

# We can also individually ask for each of these data types
# The mean
mean(x)

# median
median(x)

# minimum
min(x)

# maximum
max(x)

# In class exercise: How do these values compare to what the summary command reported?

# standard deviation 
sd(x)

# variance 
var(x)

# In class exercise: what about mode? 

mode(x)
# There is no present command in R for mode.

# Mode: Introduction to Functions
# In R, we can write a program to calculate the mode for us. 

Mode <- function(x){
  ux<- unique(x)
  ux[which.max(tabulate(match(x,ux)))]
}
Mode(x)

# apply our function to something more obvious
m <- c(1,2,1,1,0) 
Mode(m)


# PLOTTING DATA

# plot is a great tool we can use to visually inspect our data. 
plot(x)

# In class question: Interpret the plot.

# plot to compare multiple variables
# In class exercise: Create a new variable y, with 10 observations.

y <- c(4,4,5,2,9,10,1,3,3,8)
summary(y)

# scatterplot 
plot(x,y)
# plot(independent variable, dependent variable)

# Put two plots on top of each other
# Say we want to compare two variables on the same graph. There are multiple ways to do this. 
# This file will introduce two ways to code this. Let's clear our existing data and redefine our variables.
rm(x,y,m)
# Redefine variables.
x1<-c(3,1,2,2,2,6,3,4,5,3)
x2<-c(7,10,9,9,9,4,7,6,5,7)
y<-c(1,2,3,4,5,6,7,8,9,10)

#The first way places point data on top of a plot. First, we plot x1 against y. 
plot(x1,y)
points(x2, col=2) #Then we add points on top of our first plot.
# This lets us visually compare patterns in x1 against x2.

# The second way places two graphs on top of each other. 
plot(x1,y,xlab="x values",col="green")
par(new=TRUE)
plot(x2,y,axes=FALSE,ann=FALSE,col="red")

# Note, these two codes perform the same essential task, but are coded differently.

# Exporting/Saving Graphics
# There are multiple ways to export graphs in R so they may be used in other programs.
# R can create PDFs(*best option usually), .wmf, .png, JPEGS, .bmp, and .ps files. 
# Export > Save as PDF
# code
pdf(file="Graph1.pdf", width=400,height=350) #Tells R to make a PDF of what comes next
plot(x1,y)
dev.off() #Tells R to finish making the PDF.

# LOADING DATA INTO R

# Download and open the excel file, FakeData.xls.
# Using Excel, save this file as a .csv file, in Excel, using "Save As." 
# Save the .csv file in your workspace.
# BE SURE TO SAVE THE .CSV FILE IN YOUR WORKSPACE. 

# Load in the dataset using the following code:
fake.data <-read.csv("fake.data.csv")

# View the Data
View(fake.data)

# Examine the data
summary(fake.data)

# VISUALIZATIONS 

## histogram
qplot(x=X4,data=fake.data)
hist(fake.data$X4)

## create a scatterplot comparing X4 and Y
plot(fake.data$X4, fake.data$Y)
# add a trend line
abline(reg = lm(Y~X4,data=fake.data),lty=1,lwd=4, col = "purple")


#Fun with BarPlots
mean_X1 <- mean(fake.data$X1)
mean_X2 <- mean(fake.data$X2)
mean_X3 <- mean(fake.data$X3)
mean_X4 <- mean(fake.data$X4)
mean_X5 <- mean(fake.data$X5)

means <- cbind(mean_X1, mean_X2, mean_X3, mean_X4, mean_X5)
means <- c(mean_X1, mean_X2, mean_X3, mean_X4, mean_X5)

barplot(means)

# HELP FUNCITON
# You can usually ask Google, use the Help window, or type directly into the console
# by putting a ? in front of the command or by typing help.
# Ex. 
? hist
help(barplot)

